{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Intraday Factor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook we use Alphalens to analyse the performance of an intraday factor, which is computed daily but the stocks are bought at marker open and sold at market close with no overnight positions."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports & Settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:14.070853Z",
     "start_time": "2021-04-19T18:51:14.066776Z"
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:14.085970Z",
     "start_time": "2021-04-19T18:51:14.075670Z"
    }
   },
   "source": [
    "import alphalens\n",
    "import pandas as pd"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:14.095404Z",
     "start_time": "2021-04-19T18:51:14.087648Z"
    }
   },
   "source": [
    "%matplotlib inline"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loading Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below is a simple mapping of tickers to sectors for a small universe of large cap stocks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:14.105582Z",
     "start_time": "2021-04-19T18:51:14.096816Z"
    }
   },
   "source": [
    "sector_names = {\n",
    "    0 : \"information_technology\",\n",
    "    1 : \"financials\",\n",
    "    2 : \"health_care\",\n",
    "    3 : \"industrials\",\n",
    "    4 : \"utilities\", \n",
    "    5 : \"real_estate\", \n",
    "    6 : \"materials\", \n",
    "    7 : \"telecommunication_services\", \n",
    "    8 : \"consumer_staples\", \n",
    "    9 : \"consumer_discretionary\", \n",
    "    10 : \"energy\" \n",
    "}\n",
    "\n",
    "ticker_sector = {\n",
    "    \"ACN\" : 0, \"ATVI\" : 0, \"ADBE\" : 0, \"AMD\" : 0, \"AKAM\" : 0, \"ADS\" : 0, \"GOOGL\" : 0, \"GOOG\" : 0, \n",
    "    \"APH\" : 0, \"ADI\" : 0, \"ANSS\" : 0, \"AAPL\" : 0, \"AMAT\" : 0, \"ADSK\" : 0, \"ADP\" : 0, \"AVGO\" : 0,\n",
    "    \"AMG\" : 1, \"AFL\" : 1, \"ALL\" : 1, \"AXP\" : 1, \"AIG\" : 1, \"AMP\" : 1, \"AON\" : 1, \"AJG\" : 1, \"AIZ\" : 1, \"BAC\" : 1,\n",
    "    \"BK\" : 1, \"BBT\" : 1, \"BRK.B\" : 1, \"BLK\" : 1, \"HRB\" : 1, \"BHF\" : 1, \"COF\" : 1, \"CBOE\" : 1, \"SCHW\" : 1, \"CB\" : 1,\n",
    "    \"ABT\" : 2, \"ABBV\" : 2, \"AET\" : 2, \"A\" : 2, \"ALXN\" : 2, \"ALGN\" : 2, \"AGN\" : 2, \"ABC\" : 2, \"AMGN\" : 2, \"ANTM\" : 2,\n",
    "    \"BCR\" : 2, \"BAX\" : 2, \"BDX\" : 2, \"BIIB\" : 2, \"BSX\" : 2, \"BMY\" : 2, \"CAH\" : 2, \"CELG\" : 2, \"CNC\" : 2, \"CERN\" : 2,\n",
    "    \"MMM\" : 3, \"AYI\" : 3, \"ALK\" : 3, \"ALLE\" : 3, \"AAL\" : 3, \"AME\" : 3, \"AOS\" : 3, \"ARNC\" : 3, \"BA\" : 3, \"CHRW\" : 3,\n",
    "    \"CAT\" : 3, \"CTAS\" : 3, \"CSX\" : 3, \"CMI\" : 3, \"DE\" : 3, \"DAL\" : 3, \"DOV\" : 3, \"ETN\" : 3, \"EMR\" : 3, \"EFX\" : 3,\n",
    "    \"AES\" : 4, \"LNT\" : 4, \"AEE\" : 4, \"AEP\" : 4, \"AWK\" : 4, \"CNP\" : 4, \"CMS\" : 4, \"ED\" : 4, \"D\" : 4, \"DTE\" : 4,\n",
    "    \"DUK\" : 4, \"EIX\" : 4, \"ETR\" : 4, \"ES\" : 4, \"EXC\" : 4, \"FE\" : 4, \"NEE\" : 4, \"NI\" : 4, \"NRG\" : 4, \"PCG\" : 4,\n",
    "    \"ARE\" : 5, \"AMT\" : 5, \"AIV\" : 5, \"AVB\" : 5, \"BXP\" : 5, \"CBG\" : 5, \"CCI\" : 5, \"DLR\" : 5, \"DRE\" : 5,\n",
    "    \"EQIX\" : 5, \"EQR\" : 5, \"ESS\" : 5, \"EXR\" : 5, \"FRT\" : 5, \"GGP\" : 5, \"HCP\" : 5, \"HST\" : 5, \"IRM\" : 5, \"KIM\" : 5,\n",
    "    \"APD\" : 6, \"ALB\" : 6, \"AVY\" : 6, \"BLL\" : 6, \"CF\" : 6, \"DWDP\" : 6, \"EMN\" : 6, \"ECL\" : 6, \"FMC\" : 6, \"FCX\" : 6,\n",
    "    \"IP\" : 6, \"IFF\" : 6, \"LYB\" : 6, \"MLM\" : 6, \"MON\" : 6, \"MOS\" : 6, \"NEM\" : 6, \"NUE\" : 6, \"PKG\" : 6, \"PPG\" : 6,\n",
    "    \"T\" : 7, \"CTL\" : 7, \"VZ\" : 7, \n",
    "    \"MO\" : 8, \"ADM\" : 8, \"BF.B\" : 8, \"CPB\" : 8, \"CHD\" : 8, \"CLX\" : 8, \"KO\" : 8, \"CL\" : 8, \"CAG\" : 8,\n",
    "    \"STZ\" : 8, \"COST\" : 8, \"COTY\" : 8, \"CVS\" : 8, \"DPS\" : 8, \"EL\" : 8, \"GIS\" : 8, \"HSY\" : 8, \"HRL\" : 8,\n",
    "    \"AAP\" : 9, \"AMZN\" : 9, \"APTV\" : 9, \"AZO\" : 9, \"BBY\" : 9, \"BWA\" : 9, \"KMX\" : 9, \"CCL\" : 9, \n",
    "    \"APC\" : 10, \"ANDV\" : 10, \"APA\" : 10, \"BHGE\" : 10, \"COG\" : 10, \"CHK\" : 10, \"CVX\" : 10, \"XEC\" : 10, \"CXO\" : 10,\n",
    "    \"COP\" : 10, \"DVN\" : 10, \"EOG\" : 10, \"EQT\" : 10, \"XOM\" : 10, \"HAL\" : 10, \"HP\" : 10, \"HES\" : 10, \"KMI\" : 10\n",
    "}"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## YFinance Download"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.241423Z",
     "start_time": "2021-04-19T18:51:14.107318Z"
    }
   },
   "source": [
    "import yfinance as yf\n",
    "import pandas_datareader.data as web\n",
    "yf.pdr_override()\n",
    "\n",
    "tickers = list(ticker_sector.keys())\n",
    "df = web.get_data_yahoo(tickers, start='2017-01-01',  end='2017-06-01')\n",
    "df.index = pd.to_datetime(df.index, utc=True)"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.253717Z",
     "start_time": "2021-04-19T18:51:21.242284Z"
    }
   },
   "source": [
    "df = df.stack()\n",
    "df.index.names = ['date', 'asset']\n",
    "df.info()"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Factor Computation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our example factor ranks the stocks based on their overnight price gap (yesterday close to today open price). We'll  see if the factor has some alpha or if it is pure noise."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.259633Z",
     "start_time": "2021-04-19T18:51:21.254738Z"
    }
   },
   "source": [
    "available_tickers = df.index.unique('asset')\n",
    "ticker_sector = {k: v for k, v in ticker_sector.items() if k in available_tickers}"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.272024Z",
     "start_time": "2021-04-19T18:51:21.261385Z"
    }
   },
   "source": [
    "today_open = df.loc[:, 'Open'].unstack('asset')\n",
    "today_close = df.loc[:, 'Close'].unstack('asset')\n",
    "yesterday_close = today_close.shift(1)"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.276157Z",
     "start_time": "2021-04-19T18:51:21.273553Z"
    }
   },
   "source": [
    "factor = (today_open - yesterday_close) / yesterday_close"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The pricing data passed to alphalens should contain the entry price for the assets so it must reflect the next available price after a factor value was observed at a given timestamp. Those prices must not be used in the calculation of the factor values for that time. Always double check to ensure you are not introducing lookahead bias to your study.\n",
    "\n",
    "The pricing data must also contain the exit price for the assets, for period 1 the price at the next timestamp will be used, for period 2 the price after 2 timestamps will be used and so on.\n",
    "\n",
    "There are no restrinctions/assumptions on the time frequencies a factor should be computed at and neither on the specific time a factor should be traded (trading at the open vs trading at the close vs intraday trading), it is only required that factor and price DataFrames are properly aligned given the rules above.\n",
    "\n",
    "In our example, we want to buy the stocks at marker open, so the need the open price at the exact timestamps as the factor valules, and we want to sell the stocks at market close so we will add the close prices too, which will be used to compute period 1 forward returns as they appear just after the factor values timestamps. The returns computed by Alphalens will therefore be based on the difference between open to close assets prices.\n",
    "\n",
    "If we had other prices we could compute other period returns, for example one hour after market open and 2 hours and so on. We could have added those prices right after the open prices and instruct Alphalens to compute 1, 2, 3... periods too  and not only period 1 like in this example."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Formatting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Time Adjustments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.285341Z",
     "start_time": "2021-04-19T18:51:21.277049Z"
    }
   },
   "source": [
    "# Fix time as Yahoo doesn't set it\n",
    "today_open.index += pd.Timedelta('9h30m')\n",
    "today_close.index += pd.Timedelta('16h')\n",
    "# pricing will contain both open and close\n",
    "pricing = pd.concat([today_open, today_close]).sort_index()"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.314071Z",
     "start_time": "2021-04-19T18:51:21.286210Z"
    }
   },
   "source": [
    "pricing.head()"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Align Factor & Price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.320284Z",
     "start_time": "2021-04-19T18:51:21.315441Z"
    }
   },
   "source": [
    "# Align factor to open price\n",
    "factor.index += pd.Timedelta('9h30m')\n",
    "factor = factor.stack()\n",
    "factor.index = factor.index.set_names(['date', 'asset'])"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.352176Z",
     "start_time": "2021-04-19T18:51:21.321710Z"
    }
   },
   "source": [
    "factor.unstack().head()"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run Alphalens"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Period 1 will show returns from market open to market close while period 2 will show returns from today open to tomorrow open"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get Alphalens Input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:21.735159Z",
     "start_time": "2021-04-19T18:51:21.353187Z"
    }
   },
   "source": [
    "non_predictive_factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, \n",
    "                                                                                  pricing, \n",
    "                                                                                  periods=(1,2),\n",
    "                                                                                  groupby=ticker_sector,\n",
    "                                                                                  groupby_labels=sector_names)"
   ],
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Returns Tear Sheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:23.586574Z",
     "start_time": "2021-04-19T18:51:21.736402Z"
    },
    "scrolled": false
   },
   "source": [
    "alphalens.tears.create_returns_tear_sheet(non_predictive_factor_data)"
   ],
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-04-19T18:51:25.407194Z",
     "start_time": "2021-04-19T18:51:23.587527Z"
    },
    "scrolled": false
   },
   "source": [
    "alphalens.tears.create_event_returns_tear_sheet(non_predictive_factor_data, pricing);"
   ],
   "outputs": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
